/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2004 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: mx_ether.c,v 1.76 2006/12/12 21:14:22 loic Exp $";

#include "mx_arch.h"
#include "mx_misc.h"
#include "mx_instance.h"
#include "mx_malloc.h"
#include "mx_ether_common.h"
#include "mx_stbar.h"
#include "mx_pio.h"
#define mx_printf printk
#include "mx_debug.h"
#include "mx_peer.h"

#ifndef MX_MCP_ETHER_FLAGS_GATEWAY
#define mx_ether_gw_rx(a,b)
#endif


/* compatibility cruft for multiple linux versions */

static struct net_device *
mx_netdev_alloc(char *s, int *err, int num)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
	char name[IFNAMSIZ];
	struct net_device *dev;

	sprintf(name, s, num);
	dev = alloc_netdev(0, name, ether_setup);
	if (!dev)
		*err = -ENOMEM;
	else
		*err = 0;
	return (dev);
#else
	return (dev_alloc(s,err));
#endif
}

static void
mx_netdev_free(struct net_device *d)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
	free_netdev(d);
#else
	kfree(d);
#endif
}

/* This is the max number of skb frags we expect we are likely to
   encounter from a well behaved sender.  Beyond that, we may
   linearize an skb so as to make it fit into the available space in
   the send ring */

#define MX_MAX_SKB_FRAGS (MX_ATOP(MX_PAGE_ALIGN(MX_MAX_ETHER_MTU - 14))  + 2)

/*
 * Encapsulate an skbuf for DMA.  This is tricky since we support
 * frags, and because iommu's require us to unmap memory.
 *
 * In the ring, the following hints are used by the tx completion
 * code:
 * bus INVALID, skb null: head frag
 * skb null: part of a frag
 * skb non-null: skb
 */

static inline int
mx_encap(struct mx_ether *eth, struct sk_buff *skb)
{
	dma_addr_t bus, frag_bus;
	struct skb_frag_struct *frag;
	mcp_kreq_ether_send_t req_list[MX_MCP_ETHER_MAX_SEND_FRAG + 1];
	mcp_kreq_ether_send_t *req;
	int len;
	int head_frag_num;
	
	int i, idx, avail, frag_cnt, f, err;

	/* leave one slot to keep ring from wrapping */
	avail = NUM_TX - 1 - (eth->tx.req - eth->tx.done);
	frag_cnt = skb_shinfo(skb)->nr_frags;

	if (avail < 1 + 4)
		return -ENOBUFS;

	/* if the 1 + 1 + frag_cnt is greater than available free
	   request slots, then  we will run out of space in
	   the ring.  This should never happen, as the
	   queue should have been stopped previously.
	*/

	if ((avail < (1 + 4 + frag_cnt))  ||
	    (4 + frag_cnt) > MX_MCP_ETHER_MAX_SEND_FRAG) {
		if (mx_skb_linearize(skb)) {
			return -ENOBUFS;
		}
		frag_cnt = 0;
	}

	i = eth->tx.req;
	req = req_list;
	pci_unmap_addr_set(&eth->tx.info[i & (NUM_TX - 1)], bus, INVALID_DMA_ADDR);
	i++;
	
	/* Give the MCP the dest addr (already in network byte order) */
	req->head.dest_high16 = *(uint16_t *)&skb->data[0];
	req->head.dest_low32 = *(uint32_t *)&skb->data[2];
	req->head.flags = MX_MCP_ETHER_FLAGS_VALID | MX_MCP_ETHER_FLAGS_HEAD;
#ifdef MX_MCP_ETHER_FLAGS_GATEWAY
	if (!(skb->data[0] & 1)) {
		mx_peer_hash_t *bin;
		bin = mx_peer_lookup_eth(htons(req->head.dest_high16), ntohl(req->head.dest_low32), 0);
		if (bin) {
			mx_assert(bin->gw < mx_max_nodes || bin->gw == MX_PEER_INVALID);
			req->head.peer_index = htons(bin->gw);
		} else {
			req->head.peer_index = htons(MX_PEER_INVALID);
		}
		req->head.flags |= MX_MCP_ETHER_FLAGS_GATEWAY;
	}
#endif
	/* Setup checksum offloading, if needed */
	if (skb->ip_summed == CHECKSUM_PARTIAL) {
		uint16_t pseudo_hdr_offset;
		uint16_t cksum_offset;
		
		pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data;
		cksum_offset =  skb->h.raw - skb->data;
		/* FIXME: if first frag cross 4k boundary before csum_off or pseudo_hdr */
		if (pseudo_hdr_offset < MX_MCP_ETHER_MAX_PSEUDO_OFF
		    && pskb_may_pull(skb, pseudo_hdr_offset + 2)
		    && cksum_offset < 254
		    && pskb_may_pull(skb, cksum_offset + 2)) {
			req->head.pseudo_hdr_offset = htons(pseudo_hdr_offset);
			req->head.cksum_offset = htons(cksum_offset);
			req->head.flags |= MX_MCP_ETHER_FLAGS_CKSUM;
		} else {
			/* do it in software */
			uint16_t csum;
			csum = skb_checksum(skb, cksum_offset, 
					    skb->len - cksum_offset, 0);
			csum = csum_fold(csum);
			mx_printf_once("Warning: sum field offset %d >= %d\n",
				       pseudo_hdr_offset, 
				       MX_MCP_ETHER_MAX_PSEUDO_OFF);
			if (pskb_may_pull(skb, pseudo_hdr_offset))
				*(uint16_t*)(skb->h.raw + skb->csum) = csum;
		}
	} else {
		req->head.pseudo_hdr_offset = 0;
		req->head.cksum_offset = 0;
	}
	req++;

	/* map the skbuf */
	idx = i & (NUM_TX - 1);
	bus = pci_map_single(eth->is->arch.pci_dev,
			     skb->data, skb_headlen(skb),
			     PCI_DMA_TODEVICE);
	if (bus == INVALID_DMA_ADDR)
		return -ENXIO;

	eth->tx.info[idx].u.skb = skb;
	len = skb_headlen(skb);

	frag_bus = bus;
	head_frag_num = 0;
	do {
		unsigned frag_len = len;

		head_frag_num += 1;
		mx_assert(head_frag_num <= 4);
		mx_assert(head_frag_num ==1 || (bus & 0xfff) == 0);
		/* don't cross 4K barrier */
		if ((bus & 0xfff) + len > 4096)
			frag_len = 4096 - (bus & 0xfff);

		idx = i & (NUM_TX - 1);

		pci_unmap_addr_set(&eth->tx.info[idx], bus, frag_bus);
		pci_unmap_len_set(&eth->tx.info[idx], len, len);
		/* store DMA address and len */
		req->frag.addr_low = htonl(MX_LOWPART_TO_U32(bus));
		req->frag.addr_high = htonl(MX_HIGHPART_TO_U32(bus));
		req->frag.length = htons((uint16_t)frag_len);
		/* This is used only for debugging */
		req->frag.flags = MX_MCP_ETHER_FLAGS_VALID;

		i++;
		req++;
		frag_bus = INVALID_DMA_ADDR; /* first iteration record whole area */
		len -= frag_len;
		bus += frag_len;
	} while (len > 0);

	if (frag_cnt) {
		/* now attempt to map the frags, if we have any */
		for (f = 0; f < frag_cnt; f++, i++) {
			idx = i & (NUM_TX - 1);
			frag = &skb_shinfo(skb)->frags[f];
			bus = pci_map_page(eth->is->arch.pci_dev,
					   frag->page,
					   frag->page_offset,
					   frag->size,
					   PCI_DMA_TODEVICE);
			if (bus == INVALID_DMA_ADDR) {
				err = -ENXIO;
				goto abort;
			}
			pci_unmap_addr_set(&eth->tx.info[idx], bus, bus);
			pci_unmap_len_set(&eth->tx.info[idx], len, frag->size);
			req->frag.addr_low = htonl(MX_LOWPART_TO_U32(bus));
			req->frag.addr_high = htonl(MX_HIGHPART_TO_U32(bus));
			req->frag.length = htons((uint16_t)frag->size);
			/* This is used only for debugging */
			req->frag.flags = MX_MCP_ETHER_FLAGS_VALID;
			req++;
		}
	}


	/* account for slots used by head + main skbuf + any frags*/
	avail -= (1 + head_frag_num + frag_cnt); 

	/* This check must go before the device can send
	   the frame so that we are always assured of getting
	   at least one tx complete irq after the  queue
	   has been stopped. */

	if (avail < MX_MAX_SKB_FRAGS + 1 + 4)
		netif_stop_queue(eth->arch.dev);

	/* terminate the request chain */
	req[-1].frag.flags |= MX_MCP_ETHER_FLAGS_LAST;

	/* Tell the lanai about it */
	mx_ether_submit_tx_req(eth, req_list, 1 + head_frag_num + frag_cnt);
	return 0;

	
abort:
	for (i--; i != eth->tx.req; i--) {
		idx = i & (NUM_TX - 1);
		if (eth->tx.info[idx].u.skb != 0 ) {
			eth->tx.info[idx].u.skb = 0;
			pci_unmap_single(eth->is->arch.pci_dev,
					 pci_unmap_addr(&eth->tx.info[idx], bus),
					 pci_unmap_len(&eth->tx.info[idx],  len),
					 PCI_DMA_TODEVICE);
		} else if (pci_unmap_addr(&eth->tx.info[idx], bus) != INVALID_DMA_ADDR) {
			pci_unmap_page(eth->is->arch.pci_dev,
				       pci_unmap_addr(&eth->tx.info[idx], bus),
				       pci_unmap_len(&eth->tx.info[idx],  len),
				       PCI_DMA_TODEVICE);
		}
	}
	return err;
}

static int
mx_ether_xmit(struct sk_buff *skb, struct net_device *dev)
{
	int err;
	if (skb->len < ETH_ZLEN) {
		struct mx_ether *eth = dev->priv;
		if (mx_skb_padto(skb, ETH_ZLEN) != 0) {
			eth->arch.stats.tx_dropped += 1;
			return 0;
		}
		skb->len = ETH_ZLEN;
	}
	err = mx_encap(dev->priv, skb);
	if (err) {
		return 1;
	}
	dev->trans_start = jiffies;
	return 0;
}


void
mx_ether_tx_done(mx_instance_state_t *is, uint32_t mcp_index)
{
	struct mx_ether *eth;
	struct sk_buff *skb;
	int idx;

	eth = is->ether;

	while (eth->tx.done != mcp_index) {
		unsigned unmap_len;
		dma_addr_t bus = INVALID_DMA_ADDR; /* stupid gcc */

		idx = eth->tx.done & (NUM_TX - 1);
		bus = pci_unmap_addr(&eth->tx.info[idx], bus);
		unmap_len = pci_unmap_len(&eth->tx.info[idx], len);
		skb = eth->tx.info[idx].u.skb;
		eth->tx.info[idx].u.skb = 0;
		eth->tx.done++;


		if (skb) {
			/* Unmap an skbuf */
			pci_unmap_single(eth->is->arch.pci_dev,
					 bus,
					 unmap_len,
					 PCI_DMA_TODEVICE);
			/* Update our stats */
			eth->arch.stats.tx_bytes += skb->len;
			eth->arch.stats.tx_packets++;
			/* Free the skb */
			dev_kfree_skb_irq(skb);
		} else if (bus != INVALID_DMA_ADDR) {
			/* Unmap a fragment */
			pci_unmap_page(eth->is->arch.pci_dev,
				       bus,
				       unmap_len,
				       PCI_DMA_TODEVICE);
		}
	}
	
	/* start the queue if we've stopped it */
	if (netif_queue_stopped(eth->arch.dev) 
	    && eth->tx.req - eth->tx.done < (NUM_TX >> 2))
		netif_wake_queue(eth->arch.dev);
}

/*
 * Allocate an skb for receive.  We must ensure that
 * big buffers are aligned on a 4KB boundary
 *
 */

static inline int
mx_get_buf(struct net_device *dev, struct mx_ether *eth, 
	   mx_ether_rx_buf_t *rx, int idx, int bytes, int gfp_mask)
{
	struct sk_buff *skb;
	dma_addr_t bus;
	uintptr_t data, roundup, pad;
	int len, retval = 0;

	/* 
	   The pad is used to deterimine the roundup, so it
	   must be a non-zero power of 2.  This means we must
	   allocate an extra 16 bytes for smalls */
	pad = 16;
	if (bytes > 4096)
		pad = 4096;

	len = SKB_ROUNDUP(bytes + pad + MX_MCP_ETHER_PAD);
	skb = alloc_skb(len + 16, gfp_mask);
	if (!skb) {
		rx->alloc_fail++;
		retval = -ENOBUFS;
		goto done;
	}
	/* mimic dev_alloc_skb() and add at least bytes of headroom.
	   we need to add more if this is a big buffer, and we're
	   aligning to the start of a 4KB chunk
	*/
	data = ((uintptr_t)(skb->data) + 16 + pad);
	roundup = (data & ~(pad - 1)) - ((uintptr_t)(skb->data));
	skb_reserve(skb, roundup);

	/* re-set len so that it only covers the area we
	   need mapped for DMA */
	len = bytes + MX_MCP_ETHER_PAD;
	
	bus = pci_map_single(eth->is->arch.pci_dev,
			     skb->data, len, PCI_DMA_FROMDEVICE);
	if (bus == INVALID_DMA_ADDR) {
		dev_kfree_skb_any(skb);
		retval = -ENXIO;
		goto done;
	}

	/* make sure it does not cross a 4GB boundary */
	mx_assert((uint32_t)(len + MX_LOWPART_TO_U32(bus)) > 
		  (uint32_t)(MX_LOWPART_TO_U32(bus)));

	rx->info[idx].u.skb = skb;
	pci_unmap_addr_set(&rx->info[idx], bus, bus);
	pci_unmap_len_set(&rx->info[idx], len, len);
	rx->shadow[idx].addr_low = htonl(MX_LOWPART_TO_U32(bus));
	rx->shadow[idx].addr_high = htonl(MX_HIGHPART_TO_U32(bus));
       
done:
	/* copy 4 descriptors to the mcp at a time */
	if ((idx & 3) == 3) {
		/* 4 descriptors == 32 bytes for Z fast-writes */
		mx_pio_memcpy(&rx->ring[idx - 3], &rx->shadow[idx - 3],
			    4 * sizeof (*rx->ring), 0);
		MX_STBAR();
		MX_PIO_WRITE(rx->lanai_cnt, htonl(rx->cnt));
	}
        return retval;
}


int
mx_get_buf_big(struct mx_ether *eth, int idx, int gfp_mask)
{
	struct mx_ether_buffer_info *info;
	dma_addr_t bus;
	struct page *page;
	mx_ether_rx_buf_t *rx = &eth->rx_big;
	int retval = 0;

	page = alloc_page(gfp_mask);
	if (!page) {
		retval = -ENOMEM;
		goto done;
	}
	bus = pci_map_page(eth->is->arch.pci_dev,
			   page, 0, PAGE_SIZE,
			   PCI_DMA_FROMDEVICE);
	if (bus == INVALID_DMA_ADDR) {
		put_page(page);
		retval = -ENOMEM;
		goto done;
	}
	info = eth->rx_big.info + idx;
	info->u.page = page;
	pci_unmap_addr_set(info, bus, bus);
	pci_unmap_len_set(info, len, PAGE_SIZE);
	rx->shadow[idx].addr_low = htonl(MX_LOWPART_TO_U32(bus));
	rx->shadow[idx].addr_high = htonl(MX_HIGHPART_TO_U32(bus));

done:
	return retval;
}                         


static inline void 
mx_ether_rx_done(struct mx_ether *eth, struct net_device *dev,
		 mx_ether_rx_buf_t *rx, int bytes, int len, 
		 int csum, int flags)
{
	dma_addr_t bus;
	struct sk_buff *skb;
	int idx, unmap_len;

	idx = rx->cnt & (NUM_RX - 1);
	rx->cnt++;
	
	/* save a pointer to the received skb */
	skb = rx->info[idx].u.skb;
	bus = pci_unmap_addr(&rx->info[idx], bus);
	unmap_len = pci_unmap_len(&rx->info[idx], len);

	/* try to replace the received skb */
	if (mx_get_buf(dev, eth, rx, idx, bytes, GFP_ATOMIC)) {
		/* drop the frame -- the old skbuf is re-cycled */
		eth->arch.stats.rx_dropped += 1;
		return;
	}

	/* unmap the recvd skb */
	pci_unmap_single(eth->is->arch.pci_dev,
			 bus, unmap_len,
			 PCI_DMA_FROMDEVICE);

	/* set the length of the frame */
	/* mcp implicitly skips 1st bytes so that packet is properly
	 * aligned */
	skb_put(skb, len + MX_MCP_ETHER_PAD);
	mx_ether_gw_rx(eth->is, skb->data);
	skb_pull(skb, MX_MCP_ETHER_PAD);


	skb->protocol = eth_type_trans(skb, dev);
	skb->dev = dev;
	if (eth->csum_flag & flags) {
		skb->csum = ntohs((uint16_t)csum);
		skb->ip_summed = CHECKSUM_COMPLETE;
	}
	netif_rx(skb);
	dev->last_rx = jiffies;
	eth->arch.stats.rx_packets += 1;
	eth->arch.stats.rx_bytes += len;
}

void 
mx_ether_rx_done_small(mx_instance_state_t *is, int count, int len, 
		       int csum, int flags)
{
	struct mx_ether *eth;
	struct net_device *dev;
	mx_ether_rx_buf_t *rx;
	int bytes;

	mx_assert(count == 1);
	eth = is->ether;
	dev = eth->arch.dev;
	rx = &eth->rx_small;
	bytes = MX_SMALL_THRESH;
	mx_ether_rx_done(eth, dev, rx, bytes, len, csum, flags);
}

void 
mx_ether_rx_done_big(mx_instance_state_t *is, int count, int len, 
		       int csum, int flags)
{
	struct mx_ether *eth;
	struct net_device *dev;
	mx_ether_rx_buf_t *rx;
	int bytes;
	struct sk_buff *skb;
	int seg, idx;

	eth = is->ether;
	dev = eth->arch.dev;
	rx = &eth->rx_big;
	if (!mx_ether_rx_frags) {
		mx_assert(count == 1);
		bytes = dev->mtu + ETH_HLEN;
		mx_ether_rx_done(eth, dev, rx, bytes, len, csum, flags);
		return;
	}

	mx_assert(count <= MAX_SKB_FRAGS);
	skb = alloc_skb(64, GFP_ATOMIC);
	if (!skb) {
		eth->arch.stats.rx_dropped += 1;
		goto done;
	}
	len += MX_MCP_ETHER_PAD; /* the mcp only gives the "payload" */
	skb->len = skb->data_len = len;
	for (seg = 0; seg < count; seg++) {
		struct mx_ether_buffer_info old_buf;
		int idx;

		idx = (rx->cnt + seg) & (NUM_RX - 1);
		old_buf = rx->info[idx];
		if (mx_get_buf_big(eth, idx, GFP_ATOMIC) != 0) {
			kfree_skb(skb);
			eth->arch.stats.rx_dropped += 1;
			goto done;
		}
		pci_unmap_page(eth->is->arch.pci_dev,
			       pci_unmap_addr(&old_buf, bus),
			       pci_unmap_len(&old_buf, len),
			       PCI_DMA_FROMDEVICE);
		skb_shinfo(skb)->frags[seg].page = old_buf.u.page;
		skb_shinfo(skb)->frags[seg].page_offset = 0;
		skb_shinfo(skb)->frags[seg].size =
			(len < PAGE_SIZE ? len : PAGE_SIZE);
		len -= skb_shinfo(skb)->frags[seg].size;
		skb_shinfo(skb)->nr_frags += 1;
	}
	mx_assert(len == 0);
	if (eth->csum_flag & flags) {
		skb->csum = ntohs((uint16_t)csum);
		skb->ip_summed = CHECKSUM_COMPLETE;
	}

	/* pskb_may_pull is not available in irq context, but
	   skb_pull() (for ether_pad and eth_type_trans()) requires
	   the beginning of the packet in skb_headlen(), move it
	   manually */
	mx_assert(MX_SMALL_THRESH >= 64 && skb->len >= MX_SMALL_THRESH);

	memcpy(skb->data, page_address(skb_shinfo(skb)->frags[0].page), 64);
	skb->tail += 64;
	skb->data_len -= 64; 

	skb_shinfo(skb)->frags[0].page_offset = 64;
	skb_shinfo(skb)->frags[0].size -= 64;

	mx_ether_gw_rx(eth->is, skb->data);
	skb_pull(skb, MX_MCP_ETHER_PAD);
	skb->protocol = eth_type_trans(skb, dev);
	skb->dev = dev;
	dev->last_rx = jiffies;
	eth->arch.stats.rx_packets += 1;
	eth->arch.stats.rx_bytes += skb->len;
	netif_rx(skb);

done:
	while (count) {
		idx = (rx->cnt) & (NUM_RX - 1);
		rx->cnt++;
		/* copy 8 descriptors to the mcp at a time */
		if ((idx & 3) == 3) {
			/* use a fast PIO copy that flushes every 32 bytes */
			mx_pio_memcpy(&rx->ring[idx - 3], &rx->shadow[idx - 3], 
				    4 * sizeof (*rx->ring), 0);
			MX_STBAR();
			MX_PIO_WRITE(rx->lanai_cnt, htonl(rx->cnt));
		}
		count--;
	}
	return;

}

static int
mx_ether_close(struct net_device *dev)
{
	dma_addr_t bus;
	struct sk_buff *skb;
	struct mx_ether *eth;
	struct page *page;
	uint32_t dont_care;
	int i, unmap_len;

	eth = dev->priv;

	/* if buffers not alloced, give up */
	if (!eth->rx_big.shadow)
		return -ENOTTY;

	/* if the device not running give up */
	if (eth->running != MX_ETH_RUNNING &&
	    eth->running != MX_ETH_OPEN_FAILED)
		return -ENOTTY;

	netif_tx_disable(dev);
	mx_lanai_command(eth->is, MX_MCP_CMD_ETHERNET_DOWN,
			 0, 0, 0, &dont_care, &eth->cmd_sync);
	eth->running = MX_ETH_STOPPED;

	/* free recvs */
	for (i = 0; i < NUM_RX; i++) {
		eth->rx_small.shadow[i].addr_low = 0;
		eth->rx_small.shadow[i].addr_high = 0;
		skb = eth->rx_small.info[i].u.skb;	
		bus = pci_unmap_addr(&eth->rx_small.info[i], bus);
		unmap_len = pci_unmap_len(&eth->rx_small.info[i], len);
		eth->rx_small.info[i].u.skb = 0;
		if (skb) {
			pci_unmap_single(eth->is->arch.pci_dev,
					 bus, unmap_len,
					 PCI_DMA_FROMDEVICE);
			dev_kfree_skb(skb);
		}

		eth->rx_big.shadow[i].addr_low = 0;
		eth->rx_big.shadow[i].addr_high = 0;
		skb = eth->rx_big.info[i].u.skb;
		page = eth->rx_big.info[i].u.page;
		bus = pci_unmap_addr(&eth->rx_big.info[i], bus);
		unmap_len = pci_unmap_len(&eth->rx_big.info[i], len);
		eth->rx_big.info[i].u.skb = 0;
		if (skb) {
			if (mx_ether_rx_frags) {
				pci_unmap_page(eth->is->arch.pci_dev,
					       bus, unmap_len,
					       PCI_DMA_FROMDEVICE);
				put_page(page);
			} else {
				pci_unmap_single(eth->is->arch.pci_dev,
						 bus, unmap_len,
						 PCI_DMA_FROMDEVICE);
				dev_kfree_skb(skb);
			}
		}
	}

	/* free transmits */

	while (eth->tx.done != eth->tx.req) {
		unsigned unmap_len;
		dma_addr_t bus = INVALID_DMA_ADDR; /* stupid gcc */

		i = eth->tx.done & (NUM_TX - 1);
		bus = pci_unmap_addr(&eth->tx.info[i], bus);
		unmap_len = pci_unmap_len(&eth->tx.info[i], len);
		skb = eth->tx.info[i].u.skb;
		eth->tx.done++;
		if (skb) {
			eth->tx.info[i].u.skb = 0;
			dev_kfree_skb(skb);
			eth->arch.stats.tx_dropped += 1;
			pci_unmap_single(eth->is->arch.pci_dev,
					 bus,
					 unmap_len,
					 PCI_DMA_TODEVICE);
		} else if (bus != INVALID_DMA_ADDR) {
			pci_unmap_page(eth->is->arch.pci_dev,
				       bus,
				       unmap_len,
				       PCI_DMA_TODEVICE);
		}
	}
	mx_ether_close_common(eth->is);
	return 0;
}

static int
mx_ether_open(struct net_device *dev)
{
	int error, i, mx_big_pow2, mx_big_thresh;
	uint32_t dont_care;
	struct mx_ether *eth;

	eth = dev->priv;

	if (eth->running != MX_ETH_STOPPED)
		return 0;

	if (mx_ether_rx_frags) {
		mx_big_thresh = PAGE_SIZE;
	} else {
		/* Firmware needs the big buff size as a power of 2.  Lie and
		   tell him the buffer is larger, because we only use 1
		   buffer/pkt, and the mtu will prevent overruns */

		mx_big_pow2 = dev->mtu + MX_MCP_ETHER_PAD + ETH_HLEN;
		while ((mx_big_pow2 & (mx_big_pow2 - 1)) != 0)
			mx_big_pow2++;
		mx_big_thresh = mx_big_pow2;
	}

	error = mx_ether_open_common(eth->is, dev->mtu + ETH_HLEN,
				     MX_SMALL_THRESH, mx_big_thresh);

	if (error) {
		MX_WARN(("%s: mx_ether_open_common() failed, errno = %d\n",
			 dev->name, error));
		goto abort_with_nothing;
	}

	/* allocate recvs */
	for (i = 0; i < NUM_RX; i++) {
		error = mx_get_buf(dev, eth, &eth->rx_small, i, 
				   MX_SMALL_THRESH, GFP_KERNEL);
		if (error) {
			MX_WARN(("%s: Could not alloc small recv buffer %d, errno = %d\n",
				 dev->name, i, error));
			goto abort_with_open;
		}

		if (mx_ether_rx_frags) {
			error = mx_get_buf_big(eth, i, GFP_KERNEL);
		} else {
			error = mx_get_buf(dev, eth, &eth->rx_big, i, 
					   dev->mtu + ETH_HLEN, GFP_KERNEL);
		}
		if (error) {
			MX_WARN(("%s: Could not alloc big recv buffer %d, errno = %d\n",
				 dev->name, i, error));
			goto abort_with_open;
		}

	}

	if (mx_ether_rx_frags) {
		for (eth->rx_big.cnt = 0; eth->rx_big.cnt < NUM_RX;
		     eth->rx_big.cnt++) {
			mx_pio_memcpy(&eth->rx_big.ring[eth->rx_big.cnt],
				    &eth->rx_big.shadow[eth->rx_big.cnt], 
				    sizeof (eth->rx_big.ring[0]), 0);
			
		}
	} else {
		eth->rx_big.cnt = NUM_RX;
	}
	eth->rx_small.cnt = NUM_RX;
	MX_PIO_WRITE(eth->rx_small.lanai_cnt, htonl(eth->rx_small.cnt));
	MX_PIO_WRITE(eth->rx_big.lanai_cnt, htonl(eth->rx_big.cnt));

	/* tell the mcp about this */
	error = mx_lanai_command(eth->is, MX_MCP_CMD_ETHERNET_UP,
				 0, 0, 0, &dont_care, &eth->cmd_sync);

	if (error) {
		MX_WARN(("%s: unable to start ethernet\n", dev->name));
		goto abort_with_open;
	}
	mx_ether_start_common(eth->is, dev->mtu + ETH_HLEN, 
			      MX_SMALL_THRESH, mx_big_thresh);
	eth->running = MX_ETH_RUNNING;
	netif_wake_queue(dev);
	return 0;
	
abort_with_open:
	eth->running = MX_ETH_OPEN_FAILED;
	mx_ether_close(dev);
		
abort_with_nothing:
	eth->running = MX_ETH_STOPPED;
	return error;
}

static int
mx_ether_change_mtu (struct net_device *dev, int new_mtu)
{
	struct mx_ether *eth = dev->priv;
	int error = 0;

	if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MX_MAX_ETHER_MTU)) {
		MX_NOTE(("%s: new mtu (%d) is not valid\n",
			 dev->name, new_mtu));
		return -EINVAL;
	}
	MX_INFO(("%s: changing mtu from %d to %d\n",
		 dev->name, dev->mtu, new_mtu));
	if (eth->running && (new_mtu > dev->mtu)) {
		/* if we increase the mtu on an active device, we must
		   ensure that all buffers provided to the MCP are
		   of adequate length */
		mx_ether_close(dev);
		dev->mtu = new_mtu;
		mx_ether_open(dev);
	}  else {
		error = mx_mcpi.set_param(eth->is->id, 
					  eth->is->lanai.sram, 
					  "ethernet_mtu", 
					  new_mtu + ETH_HLEN);	
		if (!error) {
			dev->mtu = new_mtu;
		}
	}
	return error;
}

static struct net_device_stats *
mx_ether_get_stats(struct net_device *dev)
{
  struct mx_ether *eth = dev->priv;
  return &eth->arch.stats;
}

static void
mx_ether_set_multicast_list(struct net_device *dev)
{
	mx_ether_set_promisc_common(dev->priv,
				    dev->flags & IFF_PROMISC);
}

void
mx_ether_timeout(struct net_device *dev)
{
	struct mx_ether *eth = dev->priv;
	mx_instance_state_t *is;

	is = eth->is;
	if (!mx_is_dead(is)) {
		MX_WARN(("%s:mx_ether_timeout called, but lanai is running!\n",
			 dev->name));
		MX_WARN(("%s: tx req = 0x%x, tx done = 0x%x, NUM_TX = 0x%lx\n",
			 dev->name, eth->tx.req, eth->tx.done, NUM_TX));
	}
	
}

int
mx_ether_set_mac_address (struct net_device *dev, void *addr)
{
	struct sockaddr *sa = (struct sockaddr *) addr;
	struct mx_ether *eth = dev->priv;


	/* change the dev structure */
	bcopy(sa->sa_data, dev->dev_addr, 6);
	mx_ether_set_mac_address_common(eth, dev->dev_addr);
	return 0;
}

static int
mx_ether_ethtool(struct net_device *dev, void *uva)
{
	struct ethtool_value val;
	struct mx_ether *eth = dev->priv;
	mx_instance_state_t *is = eth->is;
	uint32_t cmd;
	int error = 0;

	if (copy_from_user(&cmd, uva, sizeof (cmd)))
		return -EFAULT;

	switch (cmd) {
	case ETHTOOL_GDRVINFO: 	{
		struct ethtool_drvinfo info;
		uint32_t mcp_version;
		error = mx_mcpi.get_param(is->id, is->lanai.sram, 
					  "mcp_version", &mcp_version);
		if (error) {
			error = -ENXIO;
			goto abort;
		}
		bzero(&info, sizeof (info));
		info.cmd = ETHTOOL_GDRVINFO;
		strcpy(info.driver, "mx ethernet (myri)");
		sprintf(info.version, "0x%x", MX_MCP_DRIVER_API_VERSION);
		sprintf(info.fw_version, "0x%x", mcp_version);
		strcpy(info.bus_info, mx_pci_name(is->arch.pci_dev));
		if (copy_to_user(uva, &info, sizeof (info)))
			error = -EFAULT;
	}
	break;

#ifdef ETHTOOL_GLINK
	case ETHTOOL_GLINK:
		val.cmd = ETHTOOL_GLINK;
		val.data = is->link_state ? 1:0;
		if (copy_to_user(uva, &val, sizeof (val)))
			error = -EFAULT;
		break;
#endif		

	case ETHTOOL_GRXCSUM: 
		val.cmd = ETHTOOL_GRXCSUM;
		val.data = (eth->csum_flag & MX_MCP_ETHER_FLAGS_CKSUM) != 0;
		if (copy_to_user(uva, &val, sizeof (val)))
			error = -EFAULT;
		break;

	case ETHTOOL_SRXCSUM:
		if (copy_from_user(&val, uva, sizeof (val))) {
			error -= -EFAULT;
			goto abort;
		}
		if (val.data)
			eth->csum_flag |= MX_MCP_ETHER_FLAGS_CKSUM;
		else
			eth->csum_flag &= ~MX_MCP_ETHER_FLAGS_CKSUM;
		break;

	case ETHTOOL_GTXCSUM: 
		val.cmd = ETHTOOL_GTXCSUM;
		val.data = (dev->features & NETIF_F_IP_CSUM) != 0;
		if (copy_to_user(uva, &val, sizeof (val)))
			error = -EFAULT;
		break;

	case ETHTOOL_STXCSUM:
		if (copy_from_user(&val, uva, sizeof (val))) {
			error -= -EFAULT;
			goto abort;
		}
		if (val.data)
			dev->features |= NETIF_F_IP_CSUM;
		else
			dev->features &= ~NETIF_F_IP_CSUM;
		break;

	case ETHTOOL_GSG: 
		val.cmd = ETHTOOL_GSG;
		val.data = (dev->features & NETIF_F_SG) != 0;
		if (copy_to_user(uva, &val, sizeof (val)))
			error = -EFAULT;
		break;

	case ETHTOOL_SSG:
		if (copy_from_user(&val, uva, sizeof (val))) {
			error -= -EFAULT;
			goto abort;
		}
		if (val.data)
			dev->features |= NETIF_F_SG;
		else
			dev->features &= ~NETIF_F_SG;
		break;
#ifdef ETHTOOL_GCOALESCE
		/* note that we don't distinguish between transmit and
		   receive interrupt coalescing.  Nor do we (yet) have
		   a concept of forcing an irq if more the N packets
		   have been received */
	case ETHTOOL_GCOALESCE: {
		struct ethtool_coalesce coal;

		bzero(&coal, sizeof (coal));
		error = mx_mcpi.get_param(is->id, is->lanai.sram, 
					  "intr_coal_delay",
					  &coal.rx_coalesce_usecs);
		if (error) {
			error = -ENXIO;
			goto abort;
		}
		coal.tx_coalesce_usecs = coal.rx_coalesce_usecs;
		if (copy_to_user(uva, &coal, sizeof (coal)))
			error = -EFAULT;
	} break;

	case ETHTOOL_SCOALESCE: {
		struct ethtool_coalesce coal;
		uint32_t old_val, new_val = 0;

		error = mx_mcpi.get_param(is->id, is->lanai.sram, 
					  "intr_coal_delay",
					  &old_val);
		if (error)
			goto abort;

		if (copy_from_user(&coal, uva, sizeof (coal))) {
			error -= -EFAULT;
			goto abort;
		}

		if (coal.tx_coalesce_usecs != old_val)
			new_val = coal.tx_coalesce_usecs;
		else if (coal.rx_coalesce_usecs != old_val)
			new_val = coal.rx_coalesce_usecs;
		
		if (new_val == 0  || new_val > 1000) {
			error = -EINVAL;
			goto abort;
		}
		error = mx_mcpi.set_param(is->id, is->lanai.sram, 
					  "intr_coal_delay",
					  new_val);
		if (error)
			error = -ENXIO;	
	} break;
#endif /* ETHTOOL_GCOALESCE */

	default:
		error = -EOPNOTSUPP;
		break;
	}

abort:
	return error;

}

static int
mx_ether_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
	int error = -EOPNOTSUPP;;

	switch (cmd) {

	case SIOCETHTOOL:
		error = mx_ether_ethtool(dev, (void *) ifr->ifr_data);
		break;

	/* put link-state checking here */

	default: 
		break;
	}

	return error;
}


static int
mx_ether_init(struct net_device *dev)
{
	return 0;
}


void
mx_ether_link_change_notify(mx_instance_state_t *is)
{
	if (is->ether == NULL)
		return;
	if (is->link_state)
		netif_carrier_on(is->ether->arch.dev);
	else
		netif_carrier_off(is->ether->arch.dev);
}

int 
mx_ether_attach(mx_instance_state_t *is)
{
	struct net_device *dev;
	struct mx_ether *eth;
	int err, i;

	eth = mx_kmalloc((sizeof *eth), MX_MZERO|MX_NOWAIT);
	if (!eth)
		return ENOMEM;	

	eth->csum_flag = (mx_ether_csum ? MX_MCP_ETHER_FLAGS_CKSUM : 0);
	dev = mx_netdev_alloc("myri%d", &err, is->id);
	if (!dev) {
		MX_WARN(("myri%d: Alloc netdev failed\n", is->id));
		mx_kfree(eth);
		return err;
	}
	ether_setup(dev);
	/* setup all the pointers.. */
	eth->is = is;
	eth->arch.dev = dev;
	dev->priv = eth;
	MX_STBAR();
	is->ether = eth;

	mx_ether_link_change_notify(is);
	dev->mtu = MX_MAX_ETHER_MTU - ETH_HLEN;
	dev->open = mx_ether_open;
	dev->stop = mx_ether_close;
	dev->hard_start_xmit = mx_ether_xmit;
	dev->get_stats = mx_ether_get_stats;
	dev->base_addr = pci_resource_start(is->arch.pci_dev, 0);
	dev->irq = is->arch.irq;
	dev->init = mx_ether_init;
	dev->change_mtu = mx_ether_change_mtu;
	dev->set_multicast_list = mx_ether_set_multicast_list;
	dev->set_mac_address = mx_ether_set_mac_address;
	dev->tx_timeout = mx_ether_timeout;
	dev->watchdog_timeo = HZ * 2;
	dev->do_ioctl = mx_ether_ioctl;
	for (i = 0; i < 6; i++) {
		eth->current_mac[i] = dev->dev_addr[i]= is->mac_addr[i];
	}

	dev->features = NETIF_F_SG| (mx_ether_csum ? NETIF_F_IP_CSUM : 0) | NETIF_F_HIGHDMA;
	if (PAGE_SIZE != 4096 && is->board_type == MX_BOARD_TYPE_Z) {
		dev->features &= ~NETIF_F_SG;
		MX_WARN(("SG disabled for > 4K pages: contact help@myri.com\n"));
	}

	if (mx_ether_rx_frags) {
		MX_INFO(("%s: Will use skbuf frags to receive big frames\n", dev->name));
	}	
	if ((err = register_netdev(dev))) {
		MX_WARN(("%s:register_netdev failed!!\n", dev->name));
		mx_netdev_free(dev);
		mx_kfree(eth);
		is->ether = NULL;
	}
	return err;
}

void
mx_ether_detach(mx_instance_state_t *is)
{
	struct net_device *dev;
	struct mx_ether *eth;

	
	eth = is->ether;
	if (!eth)
		return;
	dev = eth->arch.dev;
	unregister_netdev(dev);
	is->ether = 0;
	MX_STBAR();
	mx_netdev_free(dev);
	mx_kfree(eth);
}


int
mx_ether_parity_detach(mx_instance_state_t *is)
{
	struct mx_ether *eth = is->ether;

	if (!eth)
		return 0;

	if (eth->arch.dev->flags & IFF_UP) {
		MX_WARN(("Detaching myri%d\n", is->id));
		mx_ether_close(eth->arch.dev);
		return 1;
	}
	return 0;
}

void
mx_ether_parity_reattach(mx_instance_state_t *is)
{
	struct mx_ether *eth = is->ether;

	MX_WARN(("re-attaching myri%d\n", is->id));
	mx_ether_open(eth->arch.dev);
}

/*
  This file uses MX driver indentation.

  Local Variables:
  c-file-style:"linux"
  tab-width:8
  End:
*/
